ACS2 in Multiplexer



In [1]:

    
%matplotlib inline

# General
from __future__ import unicode_literals

import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as tkr

import numpy as np
import pandas as pd

# Logger
import logging
logging.basicConfig(level=logging.WARN)

# ALCS + custom environments
import sys, os
sys.path.append(os.path.abspath('../../..'))

# Enable automatic module reload
%load_ext autoreload
%autoreload 2

# Load PyALCS module
from lcs.agents import EnvironmentAdapter
from lcs.agents.acs2 import ACS2, Configuration, ClassifiersList
from lcs.metrics import population_metrics

# Load environments
import gym
import gym_multiplexer

Multiplexer



In [2]:

    
mp = gym.make('boolean-multiplexer-20bit-v0')
situation = mp.reset()

# render phenotype
mp.render()









    



[0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0]



In [3]:

    
# perform random action
state, reward, done, _ = mp.step(mp.action_space.sample())
print(f"New state: {state}, reward: {reward}, is done: {done}")









    



New state: [0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1], reward: 1000, is done: True

Environment adapter



In [4]:

    
class MultiplexerAdapter(EnvironmentAdapter):
    @staticmethod
    def to_genotype(phenotype):
        return [str(x) for x in phenotype]



In [5]:

    
genotype = MultiplexerAdapter().to_genotype(state)
''.join(genotype)









    Out[5]:





'001000000110010110101'

Go agent, go...

Perform experiment for a couple of explore/exploit trials.



In [6]:

    
def get_6bit_mp_actors():        
    mp = gym.make('boolean-multiplexer-6bit-v0')
    cfg = Configuration(
        mp.env.observation_space.n, 2,
        environment_adapter=MultiplexerAdapter(),
        user_metrics_collector_fcn=population_metrics,
        do_ga=True)

    return ACS2(cfg), mp

def get_11bit_mp_actors():
    mp = gym.make('boolean-multiplexer-11bit-v0')    
    cfg = Configuration(
        mp.env.observation_space.n, 2,
        environment_adapter=MultiplexerAdapter(),
        user_metrics_collector_fcn=population_metrics,
        do_ga=True)

    return ACS2(cfg), mp

def get_20bit_mp_actors():
    mp = gym.make('boolean-multiplexer-20bit-v0')
    cfg = Configuration(
        mp.env.observation_space.n, 2,
        environment_adapter=MultiplexerAdapter(),
        user_metrics_collector_fcn=population_metrics,
        do_ga=True)

    return ACS2(cfg), mp



In [7]:

    
def perform_experiment(agent, env, trials=250_000):
    population, metrics = agent.explore_exploit(env, trials)
    
    print("Population size: {}".format(metrics[-1]['population']))
    print("Reliable size: {}".format(metrics[-1]['reliable']))
    print(metrics[-1])
    
    reliable_classifiers = [c for c in population if c.is_reliable()]
    reliable_classifiers = sorted(reliable_classifiers, key=lambda cl: -cl.fitness)

    # Print top 10 reliable classifiers
    for cl in reliable_classifiers[:10]:
        print(f"{cl}, q: {cl.q:.2f}, fit: {cl.fitness:.2f}, exp: {cl.exp:.2f}")
    
    return population, metrics

Here you will probably want to run these experiments for about 250k trials.



In [8]:

    
TRIALS = 5_000

6-bit MPX



In [9]:

    
%%time
p6, m6 = perform_experiment(*get_6bit_mp_actors(), trials=TRIALS)









    



Population size: 160
Reliable size: 53
{'trial': 4995, 'steps_in_trial': 1, 'reward': 1000, 'population': 160, 'numerosity': 351, 'reliable': 53}
10##0## 0 ######1          (empty)               q: 1.0   r: 1e+03  ir: 1e+03  f: 1e+03  exp: 197 tga: 4886  talp: 4924  tav: 21.3   num: 14, q: 1.00, fit: 1000.00, exp: 197.00
01#0### 0 ######1          (empty)               q: 1.0   r: 1e+03  ir: 1e+03  f: 1e+03  exp: 196 tga: 4880  talp: 4992  tav: 26.3   num: 15, q: 1.00, fit: 1000.00, exp: 196.00
01#1### 1 ######1          (empty)               q: 1.0   r: 1e+03  ir: 1e+03  f: 1e+03  exp: 206 tga: 4902  talp: 4968  tav: 23.0   num: 12, q: 1.00, fit: 1000.00, exp: 206.00
0#11### 1 ######1          (empty)               q: 1.0   r: 1e+03  ir: 1e+03  f: 1e+03  exp: 206 tga: 4902  talp: 4972  tav: 21.5   num: 5, q: 1.00, fit: 1000.00, exp: 206.00
10##1## 1 ######1          (empty)               q: 1.0   r: 1e+03  ir: 1e+03  f: 1e+03  exp: 183 tga: 4906  talp: 4994  tav: 20.5   num: 14, q: 1.00, fit: 1000.00, exp: 183.00
11###1# 1 ######1          (empty)               q: 1.0   r: 1e+03  ir: 1e+03  f: 1e+03  exp: 158 tga: 4984  talp: 4998  tav: 19.7   num: 15, q: 1.00, fit: 1000.00, exp: 158.00
000###0 0 ######1          (empty)               q: 1.0   r: 1e+03  ir: 1e+03  f: 1e+03  exp: 203 tga: 4948  talp: 4996  tav: 21.4   num: 4, q: 1.00, fit: 1000.00, exp: 203.00
#00#0## 0 ######1          (empty)               q: 1.0   r: 1e+03  ir: 1e+03  f: 1e+03  exp: 183 tga: 4948  talp: 4948  tav: 22.5   num: 2, q: 1.00, fit: 1000.00, exp: 183.00
11###0# 0 ######1          (empty)               q: 1.0   r: 1e+03  ir: 1e+03  f: 1e+03  exp: 152 tga: 4914  talp: 4988  tav: 20.6   num: 6, q: 1.00, fit: 1000.00, exp: 152.00
11###00 0 ######1          (empty)               q: 1.0   r: 1e+03  ir: 1e+03  f: 1e+03  exp: 204 tga: 4914  talp: 4988  tav: 20.6   num: 7, q: 1.00, fit: 1000.00, exp: 204.00
CPU times: user 7.11 s, sys: 17.6 ms, total: 7.12 s
Wall time: 7.15 s

11-bit MPX



In [10]:

    
%%time
p11, m11 = perform_experiment(*get_11bit_mp_actors(), trials=TRIALS)









    



Population size: 1575
Reliable size: 81
{'trial': 4995, 'steps_in_trial': 1, 'reward': 1000, 'population': 1575, 'numerosity': 1726, 'reliable': 81}
#11###0###00 0 ###########1     (empty)               q: 1.0   r: 1e+03  ir: 1e+03  f: 1e+03  exp: 117 tga: 4884  talp: 4914  tav: 44.4   num: 8, q: 1.00, fit: 1000.00, exp: 117.00
0#1#0#0####0 0 ###########1     (empty)               q: 1.0   r: 1e+03  ir: 1e+03  f: 1e+03  exp: 111 tga: 4912  talp: 4912  tav: 46.3   num: 8, q: 1.00, fit: 1000.00, exp: 111.00
0#00#0#####0 0 ###########1     (empty)               q: 1.0   r: 1e+03  ir: 1e+03  f: 1e+03  exp: 88  tga: 4926  talp: 4998  tav: 43.3   num: 7, q: 1.00, fit: 1000.00, exp: 88.00
111#######1# 1 ###########1     (empty)               q: 1.0   r: 1e+03  ir: 1e+03  f: 1e+03  exp: 93  tga: 4908  talp: 4950  tav: 41.1   num: 11, q: 1.00, fit: 999.99, exp: 93.00
1#0####1#1#0 1 ###########1     (empty)               q: 1.0   r: 1e+03  ir: 1e+03  f: 1e+03  exp: 76  tga: 4976  talp: 4976  tav: 45.4   num: 5, q: 1.00, fit: 999.96, exp: 76.00
0#1#1#1####0 1 ###########1     (empty)               q: 1.0   r: 999.9  ir: 999.9  f: 999.9  exp: 66  tga: 4970  talp: 4980  tav: 46.8   num: 8, q: 1.00, fit: 999.93, exp: 66.00
001#1######0 1 ###########1     (empty)               q: 1.0   r: 999.9  ir: 999.9  f: 999.9  exp: 70  tga: 4970  talp: 4970  tav: 43.9   num: 1, q: 1.00, fit: 999.90, exp: 70.00
00#11######0 1 ###########1     (empty)               q: 1.0   r: 999.9  ir: 999.9  f: 999.9  exp: 71  tga: 4860  talp: 4876  tav: 44.9   num: 2, q: 1.00, fit: 999.89, exp: 71.00
#10##1###1#0 1 ###########1     (empty)               q: 1.0   r: 999.9  ir: 999.9  f: 999.9  exp: 72  tga: 4904  talp: 4904  tav: 46.4   num: 6, q: 1.00, fit: 999.87, exp: 72.00
010##0###### 0 ###########1     (empty)               q: 1.0   r: 1e+03  ir: 1e+03  f: 999.8  exp: 59  tga: 4846  talp: 4998  tav: 51.3   num: 8, q: 1.00, fit: 999.84, exp: 59.00
CPU times: user 34.3 s, sys: 56.3 ms, total: 34.3 s
Wall time: 34.4 s

20-bit MPX



In [ ]:

    
%%time
p20, m20 = perform_experiment(*get_20bit_mp_actors(), trials=TRIALS)



In [ ]:

    
def parse_metrics(metrics):
    lst = [[
        m['trial'],
        m['numerosity'],
        m['reliable'],
        m['reward'],
    ] for m in metrics]

    df = pd.DataFrame(lst, columns=['trial', 'numerosity', 'reliable', 'reward'])
    df = df.set_index('trial')

    return df



In [ ]:

    
# parse metrics to df
df6bit = parse_metrics(m6)
df11bit = parse_metrics(m11)
df20bit = parse_metrics(m20)

Number of reliable classifiers



In [ ]:

    
window=50

fig, ax = plt.subplots()

df6bit['reliable'].rolling(window=window).mean().plot(label='6-bit', linewidth=1.0, ax=ax)
df11bit['reliable'].rolling(window=window).mean().plot(label='11-bit', linewidth=1.0, ax=ax)
df20bit['reliable'].rolling(window=window).mean().plot(label='20-bit', linewidth=1.0, ax=ax)

ax.set_xlabel('Trial')
ax.set_ylabel('Reliable classifiers')
ax.set_title(f'Number of reliable classifiers for boolean MPX.\nResults averaged over {window} trials')

plt.legend()
plt.show()

Average reward



In [ ]:

    
window=250

fig, ax = plt.subplots()

df6bit['reward'].rolling(window=window).mean().plot(label='6-bit', linewidth=1.0, ax=ax)
df11bit['reward'].rolling(window=window).mean().plot(label='11-bit', linewidth=1.0, ax=ax)
df20bit['reward'].rolling(window=window).mean().plot(label='20-bit', linewidth=1.0, ax=ax)
plt.axhline(1000, c='black', linewidth=1.0, linestyle=':')


ax.set_xlabel('Trial')
ax.set_ylabel('Reward')
ax.set_title(f'Reward obtained.\nResults averaged over {window} trials')
ax.set_ylim([500, 1050])

plt.legend()
plt.show()